#---------------------------------------------------------------------------#
#                                CHAPTER THREE                              #
#                       Structure of Countryside at War                     #
#                                                                           #
#                                   (2022)                                  #
#___________________________________________________________________________#
# Working_directory 
#
setwd("C:/Campo.xlsx")
#
# Import Data
#
Campo<-column_to_rownames(Rural_Col, var = "Cod_Mun")
#
# Activate libraries
library(Hmisc)
library(psych)
library(car)
library(texreg)
library(jtools)
library(sandwich)
library(dplyr)
library(tidyverse)
library(stargazer) 
#
#The data set is without missing data #
#
#
# Variables
#
Poverty<-Campo$Rural_Pov_
RTP<-Campo$RTPI_N
Rural_Inq<-Campo$Gini
Imports<-Campo$Impor_N
Violence<-Campo$Homi_thou
Rebellion<-Campo$Subver_act
Forced_Dis<-Campo$Forced_Dis
Coca<-Campo$Coca_crops
Infrastructure<-Campo$Infr_Ind
Agro_HiEdu<-Campo$Agro_Edu
Distance<-Campo$Distance
Population<-Campo$Popul
Area<-Campo$Area
#
#Explore variables
par(mfrow=c(2,2))
plot(density(Poverty))
plot(density(RTP))
plot(density(Rural_Inq))
plot(density(Imports))
plot(density(Violence))
plot(density(Rebellion))
plot(density(Forced_Dis))
plot(density(Coca))
plot(density(Infrastructure))
plot(density(Agro_HiEdu))
plot(density(Distance))
plot(density(Population))
plot(density(Area))

plot(Poverty)
hist(Poverty, breaks = 20)
summary(Poverty)
#
#
##Previous data sets
#
Campo1<-data.frame(Poverty, RTP, Rural_Inq, Imports, Violence, Rebellion, Forced_Dis, Coca, 
                   Infrastructure, Agro_HiEdu, Distance, Population, Area)
Campo2<-data.frame(RTP, Rural_Inq, Imports, Violence, Rebellion, Forced_Dis, Poverty)
Campo3<-data.frame(Coca, Infrastructure, Agro_HiEdu, Distance, Population, Area, Poverty)

# Check for variable dependencies
# As all numeric, we will rely on correlation

# Check if there are visible correlations
pairs(Campo2,
      main="Poverty Correlations (g=Low, b=Med, r=High)",
      pch = 21, bg = c("red", "green3", "blue")[unclass(factor(Poverty_clas))])
pairs(Campo3,
      main="Poverty Correlations (g=Low, b=Med, r=High)",
      pch = 21, bg = c("red", "green3", "blue")[unclass(factor(Poverty_clas))])


# No predictor association was identified or need to be removed
# If we deal with continuous vars use correlation function and plots

#   Appendix C correlations first scenario                                  # 
# Get a correlation matrix

corrm<-cor(Campo1)
Corr<-round(corrm, digits=3)
print(Corr)
export(Corr, "correlation_13v.xlsx")

# Corplot

par(mfrow=c(1,1))
install.packages("corrplot", dependencies = TRUE)
library(corrplot)
corrplot(corrm, method = "circle")

# Correlation corgram

install.packages("corrgram", dependencies = TRUE)
library(corrgram)
corrgram(Campo1, order=TRUE,
         main="LGA Poverty",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)
corrgram(Campo1, order=TRUE,
         main="LGA Poverty",
         panel=panel.ellipse,
         text.panel=panel.txt, diag.panel=panel.minmax)

# Table 11. Summary statistics 

sum.tab<- subset(Campo1, select=c(Poverty, RTP, Rural_Inq, Imports, Violence, 
                                  Rebellion, Forced_Dis, Coca,Infrastructure, 
                                  Agro_HiEdu, Distance, Population, Area));summary(sum.tab)
sd(Poverty, na.rm = FALSE)
sd(RTP, na.rm = FALSE)
sd(Rural_Inq, na.rm = FALSE)
sd(Imports, na.rm = FALSE)
sd(Violence, na.rm = FALSE)
sd(Rebellion, na.rm = FALSE)
sd(Forced_Dis, na.rm = FALSE)
sd(Coca, na.rm = FALSE)
sd(Infrastructure, na.rm = FALSE)
sd(Agro_HiEdu, na.rm = FALSE)
sd(Distance, na.rm = FALSE)
sd(Population, na.rm = FALSE)
sd(Area, na.rm = FALSE)
var(Poverty, na.rm = FALSE)
var(RTP, na.rm = FALSE)
var(Rural_Inq, na.rm = FALSE)
var(Imports, na.rm = FALSE)
var(Violence, na.rm = FALSE)
var(Rebellion, na.rm = FALSE)
var(Forced_Dis, na.rm = FALSE)
var(Coca, na.rm = FALSE)
var(Infrastructure, na.rm = FALSE)
var(Agro_HiEdu, na.rm = FALSE)
var(Distance, na.rm = FALSE)
var(Population, na.rm = FALSE)
var(Area, na.rm = FALSE)


# Table 12 Structural determinants of rural poverty in Colombia (2012-2018), 
# municipalities OLS models.
## OLS Linear model first scenario

M.1_ols<-lm(Poverty~RTP+Rural_Inq+Imports)
M.2_ols<-lm(Poverty~RTP+Rural_Inq+Imports+Violence+Rebellion+Forced_Dis+
              Coca)
M.3_ols<-lm(Poverty~RTP+Rural_Inq+Imports+Infrastructure+Agro_HiEdu+
              Distance+Population+Area)
M.4_ols<-lm(Poverty~RTP+Rural_Inq+Imports+Violence+Rebellion+Forced_Dis+
              Coca+Infrastructure+Agro_HiEdu+Distance+Population+Area)
stargazer(M.1_ols, M.2_ols, M.3_ols, M.4_ols, type = "text")
Models1to4<-stargazer(M.1_ols, M.2_ols, M.3_ols, M.4_ols, type = "text")
export(Models1to4, "Models1to4.xlsx")
write.csv(Models1to4,"M1-M4.csv", row.names = FALSE)
write.xlsx(Models1to4,"M1-M4.xlsx", row.names=FALSE)
#
### Data set without for the second scenario
#
nrow(Campo)
city.codes.remove<-c("05001","11001","41001","76001", "08001", "15001", "88001", 
                    "73001", "95001", "68001", "20001", "70001", "44001", "50001",
                    "85001", "81001", "52001", "19001", "27001","47001", "13001",
                    "18001","91001", "97001", "99001", "17001", "66001", "63001",
                    "54001","94001", "86001", "23001")

Campo_C<-Campo[!(row.names(Campo) %in% city.codes.remove),]
#
#
Poverty_C<-Campo_C$Rural_Pov_
RTP_C<-Campo_C$RTPI_N
Rural_Inq_C<-Campo_C$Gini
Imports_C<-Campo_C$Impor_N
Violence_C<-Campo_C$Homi_thou
Rebellion_C<-Campo_C$Subver_act
Forced_Dis_C<-Campo_C$Forced_Dis
Coca_C<-Campo_C$Coca_crops
Infrastructure_C<-Campo_C$Infr_Ind
Agro_HiEdu_C<-Campo_C$Agro_Edu
Distance_C<-Campo_C$Distance
Population_C<-Campo_C$Popul
Area_C<-Campo_C$Area
#
Campo_Cities<-data.frame(Poverty_C, RTP_C, Rural_Inq_C, Imports_C, Violence_C,
                         Rebellion_C, Forced_Dis_C, Coca_C, Infrastructure_C,
                         Agro_HiEdu_C, Distance_C, Population_C, Area_C)

# Appendix C variables correlations second scenario                           #
## Get a correlation matrix

corrm<-cor(Campo_Cities)
round(corrm, digits=3)
export(corrm, "corr_without_cities.xlsx")
corrplot(corrm, method = "circle")
corrgram(Campo_Cities, order=TRUE,
         main="Poverty",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)
corrgram(Campo_Cities, order=TRUE,
         main="Poverty",
         panel=panel.ellipse,
         text.panel=panel.txt, diag.panel=panel.minmax)

## Table 12 Structural determinants of rural poverty in Colombia (2012-2018), 
# municipalities OLS models.
## OLS Linear model second scenario

M.5_ols<-lm(Poverty_C~RTP_C+Rural_Inq_C+Imports_C)
M.6_ols<-lm(Poverty_C~RTP_C+Rural_Inq_C+Imports_C+Violence_C+Rebellion_C+
              Forced_Dis_C+Coca_C)
M.7_ols<-lm(Poverty_C~RTP_C+Rural_Inq_C+Imports_C+Infrastructure_C+
              Agro_HiEdu_C+Distance_C+Population_C+Area_C)
M.8_ols<-lm(Poverty_C~RTP_C+Rural_Inq_C+Imports_C+Violence_C+Rebellion_C+
              Forced_Dis_C+Coca_C+Infrastructure_C+Agro_HiEdu_C+Distance_C+
              Population_C+Area_C)
stargazer(M.5_ols, M.6_ols, M.7_ols, M.8_ols, type = "text")
Models5to8<-stargazer(M.5_ols, M.6_ols, M.7_ols, M.8_ols, type = "text")
export(Models5to8, "Models8to8.xlsx", row.names=FALSE)
write.csv(Models5to8,"M5-M8.csv", row.names = FALSE)
write.xlsx(Models1to4,"M1-M4.xlsx", row.names=FALSE)
#
#  Validation through Root Mean Squared Error (RMSE)
RMSE<-function(error){ sqrt(mean(error^2)) }
RMSE(M.1_ols$residuals)
RMSE(M.2_ols$residuals)
RMSE(M.3_ols$residuals)
RMSE(M.4_ols$residuals)
RMSE(M.5_ols$residuals)
RMSE(M.6_ols$residuals)
RMSE(M.7_ols$residuals)
RMSE(M.8_ols$residuals)
#
#Appendix D Negative binomial models
## Table 3 Structural determines of rural poverty in Colombia (2012-2018), 
#municipalities Negative Binomial models.
library(MASS)
library(magrittr)
M.1_NB<-glm.nb(Poverty~RTP+Rural_Inq+Imports)
M.2_NB<-glm.nb(Poverty~RTP+Rural_Inq+Imports+Violence+Rebellion+Forced_Dis+
              Coca)
M.3_NB<-glm.nb(Poverty~RTP+Rural_Inq+Imports+Infrastructure+Agro_HiEdu+
              Distance+Population+Area)
M.4_NB<-glm.nb(Poverty~RTP+Rural_Inq+Imports+Violence+Rebellion+Forced_Dis+
              Coca+Infrastructure+Agro_HiEdu+Distance+Population+Area)
Models1to4NB<-stargazer(M.1_NB, M.2_NB, M.3_NB, M.4_NB, type = "text")
write.csv(Models1to4NB,"M1-M4-NB.csv", row.names = FALSE)
#
M.5_NB<-glm.nb(Poverty_C~RTP_C+Rural_Inq_C+Imports_C)
M.6_NB<-glm.nb(Poverty_C~RTP_C+Rural_Inq_C+Imports_C+Violence_C+Rebellion_C+
              Forced_Dis_C+Coca_C)
M.7_NB<-glm.nb(Poverty_C~RTP_C+Rural_Inq_C+Imports_C+Infrastructure_C+
              Agro_HiEdu_C+Distance_C+Population_C+Area_C)
M.8_NB<-glm.nb(Poverty_C~RTP_C+Rural_Inq_C+Imports_C+Violence_C+Rebellion_C+
              Forced_Dis_C+Coca_C+Infrastructure_C+Agro_HiEdu_C+Distance_C+
              Population_C+Area_C)
Models5to8NB<-stargazer(M.5_NB, M.6_NB, M.7_NB, M.8_NB, type = "text")
write.csv(Models5to8NB,"M5-M8-NB.csv", row.names = FALSE)
#
#Because the values of response variable are not integers, some warnings can emerge!
